scores <- read.csv("hw1_list.csv")
scores$Match <- ifelse(substr(scores$sequence1,1,6) == substr(scores$sequence2,1,6),"Genuine", "Imposter")
genuine.scores <- subset(scores, Match == "Genuine")
imposter.scores <- subset(scores, Match == "Imposter")
hist(genuine.scores$score, col = rgb(1,0,0,0.5), main = "Genuine and Imposter Distribution", xlab = "Distance Scores", ylab = "Frequency", freq = FALSE)
hist(imposter.scores$score, col = rgb(0,0,1,0.5), add = T, freq = FALSE)
legend("top", c("Genuine", "Imposter"), fill=c(rgb(1,0,0,0.5), rgb(0,0,1,0.5)))
genuine.sd <- sd(genuine.scores$score)
imposter.sd <- sd(imposter.scores$score)
genuine.mean <- mean(genuine.scores$score)
imposter.mean <- mean(imposter.scores$score)
dprime <- (sqrt(2) * abs((genuine.mean)-(imposter.mean)))/sqrt((genuine.sd^2)+(imposter.sd^2))
D-prime is approximately 4.4899.
for(threshold in seq(0.00, 0.99, 0.02)) {
FAR <- nrow(subset(imposter.scores, imposter.scores$score >= threshold))/nrow(imposter.scores)
FRR <- nrow(subset(genuine.scores, genuine.scores$score < threshold))/nrow(genuine.scores)
DET <- data.frame(FAR, FRR)
plot(DET, xlim = range(0, max(FAR)), ylim = range(0, max(FRR)), main = "Detection Error Tradeoff" )
}
Equal Error Rate